New#

%load_ext autoreload
%autoreload 2
import os
import itertools
from functools import cache
from typing import List, Literal, Optional, Tuple

import ms3
import pandas as pd
import plotly.express as px
from dimcat import Pipeline, plotting

import utils

pd.set_option("display.max_rows", 1000)
pd.set_option("display.max_columns", 500)
RESULTS_PATH = os.path.abspath(os.path.join(utils.OUTPUT_FOLDER, "couperin_study"))
os.makedirs(RESULTS_PATH, exist_ok=True)


def make_output_path(
    filename: str,
    extension=None,
    path=RESULTS_PATH,
) -> str:
    return utils.make_output_path(filename=filename, extension=extension, path=path)


def save_figure_as(
    fig, filename, formats=("png", "pdf"), directory=RESULTS_PATH, **kwargs
):
    if formats is not None:
        for fmt in formats:
            plotting.write_image(fig, filename, directory, format=fmt, **kwargs)
    else:
        plotting.write_image(fig, filename, directory, **kwargs)


def style_plotly(
    fig,
    save_as=None,
    xaxes: Optional[dict] = None,
    yaxes: Optional[dict] = None,
    match_facet_yaxes=False,
    **layout,
):
    layout_args = dict(utils.STD_LAYOUT, **layout)
    fig.update_layout(**layout_args)
    xaxes_settings = dict(gridcolor="lightgrey")
    if xaxes:
        xaxes_settings.update(xaxes)
    fig.update_xaxes(**xaxes_settings)
    yaxes_settings = dict(gridcolor="lightgrey")
    if yaxes:
        yaxes_settings.update(yaxes)
    fig.update_yaxes(**yaxes_settings)
    if match_facet_yaxes:
        for row_idx, row_figs in enumerate(fig._grid_ref):
            for col_idx, col_fig in enumerate(row_figs):
                fig.update_yaxes(
                    row=row_idx + 1,
                    col=col_idx + 1,
                    matches="y" + str(len(row_figs) * row_idx + 1),
                )
    if save_as:
        save_figure_as(fig, save_as)
    fig.show()

Loading data

D = utils.get_dataset("couperin_concerts", corpus_release="v2.2")
D
Dataset
=======
{'inputs': {'basepath': None,
            'packages': {'couperin_concerts': ["'couperin_concerts.measures' (MuseScoreMeasures)",
                                               "'couperin_concerts.notes' (MuseScoreNotes)",
                                               "'couperin_concerts.expanded' (MuseScoreHarmonies)",
                                               "'couperin_concerts.chords' (MuseScoreChords)",
                                               "'couperin_concerts.metadata' (Metadata)"]}},
 'outputs': {'basepath': None, 'packages': {}},
 'pipeline': []}

Grouping data

pipeline = Pipeline(["KeySlicer", "ModeGrouper"])
grouped_D = pipeline.process(D)
grouped_D
SlicedGroupedDataset
====================
{'inputs': {'basepath': None,
            'packages': {'couperin_concerts': ["'couperin_concerts.measures' (MuseScoreMeasures)",
                                               "'couperin_concerts.notes' (MuseScoreNotes)",
                                               "'couperin_concerts.expanded' (MuseScoreHarmonies)",
                                               "'couperin_concerts.chords' (MuseScoreChords)",
                                               "'couperin_concerts.metadata' (Metadata)"]}},
 'outputs': {'basepath': None,
             'packages': {'features': ["'couperin_concerts.expanded.keyannotations' (KeyAnnotations)"]}},
 'pipeline': ['FeatureExtractor', 'KeySlicer', 'ModeGrouper']}
bass_notes = grouped_D.get_feature("bassnotes")
bass_notes.df
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice volta label pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type chord_tones added_tones root alt_label globalkey_is_minor localkey_is_minor globalkey_mode localkey_mode localkey_resolved localkey_and_mode root_roman relativeroot_resolved effective_localkey effective_localkey_resolved effective_localkey_is_minor pedal_resolved chord_and_mode chord_reduced chord_reduced_and_mode applied_to_numeral numeral_or_applied_to_numeral intervals_over_bass intervals_over_root scale_degrees scale_degrees_and_mode scale_degrees_major scale_degrees_minor bass_degree bass_degree_and_mode bass_degree_major bass_degree_minor bass_note_over_local_tonic globalkey localkey bass_note
mode corpus piece localkey_slice i
major couperin_concerts c01n01_prelude [0.0, 16.0) 0 1 0 0 2.00 0 1/2 4/4 1 1 <NA> G.I{ <NA> I I <NA> <NA> <NA> <NA> <NA> { M (0, 4, 1) () 0 <NA> False False major major I I, major I NaN I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0
1 2 1 2 2.00 0 0 4/4 1 1 <NA> V <NA> V V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 <NA> False False major major I I, major V NaN I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G I 1
2 2 1 4 0.50 1/2 1/2 4/4 1 1 <NA> I6 <NA> I6 I <NA> 6 <NA> <NA> <NA> <NA> M (4, 1, 0) () 0 <NA> False False major major I I, major I NaN I I False <NA> I6, major I6 I6, major <NA> I (m3, m6) (M3, P5) (3, 5, 1) (3, 5, 1), major (3, 5, 1) (#3, 5, 1) 3 3, major 3 #3 M3 G I 4
3 2 1 9/2 0.50 5/8 5/8 4/4 1 1 <NA> I <NA> I I <NA> <NA> <NA> <NA> <NA> <NA> M (0, 4, 1) () 0 <NA> False False major major I I, major I NaN I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0
4 2 1 5 0.75 3/4 3/4 4/4 1 1 <NA> V(4) <NA> V(4) V <NA> <NA> 4 <NA> <NA> <NA> M (1, 0, 2) () 1 <NA> False False major major I I, major V NaN I I False <NA> V(4), major V V, major <NA> V (P4, P5) (P4, P5) (5, 1, 2) (5, 1, 2), major (5, 1, 2) (5, 1, 2) 5 5, major 5 5 P5 G I 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
minor couperin_concerts parnasse_07 [173.0, 212.0) 230 52 52 411/2 0.25 3/8 3/8 4/4 1 1 <NA> i64 <NA> i64 i <NA> 64 <NA> <NA> <NA> <NA> m (1, 0, -3) () 0 <NA> True True minor minor i i, minor i NaN i i True <NA> i64, minor i64 i64, minor <NA> i (P4, m6) (m3, P5) (5, 1, 3) (5, 1, 3), minor (5, 1, b3) (5, 1, 3) 5 5, minor 5 5 P5 b i 1
231 52 52 823/4 0.25 7/16 7/16 4/4 1 1 <NA> iio64 <NA> iio64 ii o 64 <NA> <NA> <NA> <NA> o (-4, 2, -1) () 2 <NA> True True minor minor i i, minor ii NaN i i True <NA> iio64, minor iio64 iio64, minor <NA> ii (a4, M6) (m3, d5) (6, 2, 4) (6, 2, 4), minor (b6, 2, 4) (6, 2, 4) 6 6, minor b6 6 m6 b i -4
232 52 52 206 1.00 1/2 1/2 4/4 1 1 <NA> i6 <NA> i6 i <NA> 6 <NA> <NA> <NA> <NA> m (-3, 1, 0) () 0 <NA> True True minor minor i i, minor i NaN i i True <NA> i6, minor i6 i6, minor <NA> i (M3, M6) (m3, P5) (3, 5, 1) (3, 5, 1), minor (b3, 5, 1) (3, 5, 1) 3 3, minor b3 3 m3 b i -3
233 52 52 207 1.00 3/4 3/4 4/4 1 1 <NA> V <NA> V V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 <NA> True True minor minor i i, minor V NaN i i True <NA> V, minor V V, minor <NA> V (M3, P5) (M3, P5) (5, #7, 2) (5, #7, 2), minor (5, 7, 2) (5, #7, 2) 5 5, minor 5 5 P5 b i 1
234 53 53 208 4.00 0 0 4/4 1 1 <NA> i|PAC} <NA> i i <NA> <NA> <NA> <NA> PAC } m (0, -3, 1) () 0 <NA> True True minor minor i i, minor i NaN i i True <NA> i, minor i i, minor <NA> i (m3, P5) (m3, P5) (1, 3, 5) (1, 3, 5), minor (1, b3, 5) (1, 3, 5) 1 1, minor 1 1 P1 b i 0

8376 rows × 56 columns

bass_notes.intervals_over_bass.iloc[0]
('M3', 'P5')
local_keys = grouped_D.get_feature("KeyAnnotations")
utils.print_heading("Key Segments")
print(local_keys.groupby("mode").size().to_string())
local_keys.head()
Key Segments
------------

mode
major    279
minor    287
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice volta label globalkey_is_minor localkey_is_minor globalkey_mode localkey_mode localkey_resolved localkey_and_mode globalkey localkey
mode corpus piece localkey_slice i
major couperin_concerts c01n01_prelude [0.0, 16.0) 0 1 0 0 16.0 0 1/2 4/4 1 1 <NA> G.I{ False False major major I I, major G I
[22.5, 32.0) 22 7 6 45/2 9.5 1/8 1/8 4/4 1 1 <NA> V.V{ False False major major V V, major G V
[32.0, 40.0) 35 9 8 32 8.0 1/2 1/2 4/4 1 1 <NA> IV.ii6{ False False major major IV IV, major G IV
[40.0, 47.0) 41 11 9 40 7.0 1/2 1/2 4/4 1 1 <NA> V.V{ False False major major V V, major G V
[47.0, 98.0) 48 13 11 47 51.0 1/4 1/4 4/4 1 1 <NA> I.V65 False False major major I I, major G I
succession_map = dict(
    ascending_major={
        "1": "2",
        "2": "3",
        "3": "4",
        "4": "5",
        "5": "6",
        "6": "7",
        "7": "1",
    },
    ascending_minor={
        "1": "2",
        "2": "3",
        "3": "4",
        "4": "5",
        "5": "#6",
        "#6": "#7",
        "#7": "1",
    },
    descending={"1": "7", "2": "1", "3": "2", "4": "3", "5": "4", "6": "5", "7": "6"},
)


def inverse_dict(d):
    return {v: k for k, v in d.items()}


predecessor_map = dict(
    ascending_major=inverse_dict(succession_map["ascending_major"]),
    ascending_minor=inverse_dict(succession_map["ascending_minor"]),
    descending=inverse_dict(succession_map["descending"]),
)


def make_precise_preceding_movement_column(df):
    """Expects a dataframe containing the columns bass_degree, preceding_bass_degree, and preceding_movement,"""
    preceding_movement_precise = df.preceding_movement.where(
        df.preceding_movement != "step", df.preceding_interval
    )
    expected_ascending_degree = pd.concat(
        [
            df.loc[["major"], "bass_degree"].map(predecessor_map["ascending_major"]),
            df.loc[["minor"], "bass_degree"].map(predecessor_map["ascending_minor"]),
        ]
    )
    expected_descending_degree = df.bass_degree.map(predecessor_map["descending"])
    preceding_movement_precise = preceding_movement_precise.where(
        df.preceding_bass_degree != expected_ascending_degree, "ascending"
    )
    preceding_movement_precise = preceding_movement_precise.where(
        df.preceding_bass_degree != expected_descending_degree, "descending"
    )
    return preceding_movement_precise


def make_precise_subsequent_movement_column(df):
    """Expects a dataframe containing the columns bass_degree, subsequent_bass_degree, and subsequent_movement,"""
    subsequent_movement_precise = df.subsequent_movement.where(
        df.subsequent_movement != "step", df.subsequent_interval
    )
    expected_ascending_degree = pd.concat(
        [
            df.loc[["major"], "bass_degree"].map(succession_map["ascending_major"]),
            df.loc[["minor"], "bass_degree"].map(succession_map["ascending_minor"]),
        ]
    )
    expected_descending_degree = df.bass_degree.map(succession_map["descending"])
    subsequent_movement_precise = subsequent_movement_precise.where(
        df.subsequent_bass_degree != expected_ascending_degree, "ascending"
    )
    subsequent_movement_precise = subsequent_movement_precise.where(
        df.subsequent_bass_degree != expected_descending_degree, "descending"
    )
    return subsequent_movement_precise
preceding = bass_notes.groupby(["piece", "localkey_slice"]).shift()
preceding.columns = "preceding_" + preceding.columns
subsequent = bass_notes.groupby(["piece", "localkey_slice"]).shift(-1)
subsequent.columns = "subsequent_" + subsequent.columns
BN = pd.concat([bass_notes, preceding, subsequent], axis=1)
BN["preceding_iv"] = BN.bass_note - BN.preceding_bass_note
BN["subsequent_iv"] = BN.subsequent_bass_note - BN.bass_note
BN["preceding_interval"] = ms3.transform(BN.preceding_iv, ms3.fifths2iv, smallest=True)
BN["subsequent_interval"] = ms3.transform(
    BN.subsequent_iv, ms3.fifths2iv, smallest=True
)
BN["preceding_iv_is_step"] = BN.preceding_iv.isin(
    (-5, -2, 2, 5)
).where(  # +m2, -M2, +M2, -m2
    BN.preceding_iv.notna()
)
BN["subsequent_iv_is_step"] = BN.subsequent_iv.isin((-5, -2, 2, 5)).where(
    BN.subsequent_iv.notna()
)
BN["preceding_iv_is_0"] = BN.preceding_iv == 0
BN["subsequent_iv_is_0"] = BN.subsequent_iv == 0
BN["preceding_movement"] = (
    BN.preceding_iv_is_step.map({True: "step", False: "leap"})
    .where(~BN.preceding_iv_is_0, "same")
    .where(BN.preceding_iv.notna(), "none")
)
BN["subsequent_movement"] = (
    BN.subsequent_iv_is_step.map({True: "step", False: "leap"})
    .where(~BN.subsequent_iv_is_0, "same")
    .where(BN.subsequent_iv.notna(), "none")
)
BN["preceding_movement_precise"] = make_precise_preceding_movement_column(BN)
BN["subsequent_movement_precise"] = make_precise_subsequent_movement_column(BN)

BN.head(15)
mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice volta label pedal chord numeral form figbass changes relativeroot cadence phraseend chord_type chord_tones added_tones root alt_label globalkey_is_minor localkey_is_minor globalkey_mode localkey_mode localkey_resolved localkey_and_mode root_roman relativeroot_resolved effective_localkey effective_localkey_resolved effective_localkey_is_minor pedal_resolved chord_and_mode chord_reduced chord_reduced_and_mode applied_to_numeral numeral_or_applied_to_numeral intervals_over_bass intervals_over_root scale_degrees scale_degrees_and_mode scale_degrees_major scale_degrees_minor bass_degree bass_degree_and_mode bass_degree_major bass_degree_minor bass_note_over_local_tonic globalkey localkey bass_note preceding_mc preceding_mn preceding_quarterbeats preceding_duration_qb preceding_mc_onset preceding_mn_onset preceding_timesig preceding_staff preceding_voice preceding_volta preceding_label preceding_pedal preceding_chord preceding_numeral preceding_form preceding_figbass preceding_changes preceding_relativeroot preceding_cadence preceding_phraseend preceding_chord_type preceding_chord_tones preceding_added_tones preceding_root preceding_alt_label preceding_globalkey_is_minor preceding_localkey_is_minor preceding_globalkey_mode preceding_localkey_mode preceding_localkey_resolved preceding_localkey_and_mode preceding_root_roman preceding_relativeroot_resolved preceding_effective_localkey preceding_effective_localkey_resolved preceding_effective_localkey_is_minor preceding_pedal_resolved preceding_chord_and_mode preceding_chord_reduced preceding_chord_reduced_and_mode preceding_applied_to_numeral preceding_numeral_or_applied_to_numeral preceding_intervals_over_bass preceding_intervals_over_root preceding_scale_degrees preceding_scale_degrees_and_mode preceding_scale_degrees_major preceding_scale_degrees_minor preceding_bass_degree preceding_bass_degree_and_mode preceding_bass_degree_major preceding_bass_degree_minor preceding_bass_note_over_local_tonic preceding_globalkey preceding_localkey preceding_bass_note subsequent_mc subsequent_mn subsequent_quarterbeats subsequent_duration_qb subsequent_mc_onset subsequent_mn_onset subsequent_timesig subsequent_staff subsequent_voice subsequent_volta subsequent_label subsequent_pedal subsequent_chord subsequent_numeral subsequent_form subsequent_figbass subsequent_changes subsequent_relativeroot subsequent_cadence subsequent_phraseend subsequent_chord_type subsequent_chord_tones subsequent_added_tones subsequent_root subsequent_alt_label subsequent_globalkey_is_minor subsequent_localkey_is_minor subsequent_globalkey_mode subsequent_localkey_mode subsequent_localkey_resolved subsequent_localkey_and_mode subsequent_root_roman subsequent_relativeroot_resolved subsequent_effective_localkey subsequent_effective_localkey_resolved subsequent_effective_localkey_is_minor subsequent_pedal_resolved subsequent_chord_and_mode subsequent_chord_reduced subsequent_chord_reduced_and_mode subsequent_applied_to_numeral subsequent_numeral_or_applied_to_numeral subsequent_intervals_over_bass subsequent_intervals_over_root subsequent_scale_degrees subsequent_scale_degrees_and_mode subsequent_scale_degrees_major subsequent_scale_degrees_minor subsequent_bass_degree subsequent_bass_degree_and_mode subsequent_bass_degree_major subsequent_bass_degree_minor subsequent_bass_note_over_local_tonic subsequent_globalkey subsequent_localkey subsequent_bass_note preceding_iv subsequent_iv preceding_interval subsequent_interval preceding_iv_is_step subsequent_iv_is_step preceding_iv_is_0 subsequent_iv_is_0 preceding_movement subsequent_movement preceding_movement_precise subsequent_movement_precise
mode corpus piece localkey_slice i
major couperin_concerts c01n01_prelude [0.0, 16.0) 0 1 0 0 2.00 0 1/2 4/4 1 1 <NA> G.I{ <NA> I I <NA> <NA> <NA> <NA> <NA> { M (0, 4, 1) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0 NaN <NA> NaN NaN NaN NaN <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> NaN NaN <NA> <NA> NaN NaN NaN NaN NaN NaN <NA> NaN <NA> NaN NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN <NA> <NA> <NA> 2.0 1 2 2.00 0 0 4/4 1 1 <NA> V <NA> V V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G I 1 <NA> 1 <NA> -P4 <NA> False <NA> False none leap none leap
1 2 1 2 2.00 0 0 4/4 1 1 <NA> V <NA> V V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G I 1 1.0 0 0 2.00 0 1/2 4/4 1 1 <NA> G.I{ <NA> I I <NA> <NA> <NA> <NA> <NA> { M (0, 4, 1) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0 2.0 1 4 0.50 1/2 1/2 4/4 1 1 <NA> I6 <NA> I6 I <NA> 6 <NA> <NA> <NA> <NA> M (4, 1, 0) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I6, major I6 I6, major <NA> I (m3, m6) (M3, P5) (3, 5, 1) (3, 5, 1), major (3, 5, 1) (#3, 5, 1) 3 3, major 3 #3 M3 G I 4 1 3 -P4 -m3 False False False False leap leap leap leap
2 2 1 4 0.50 1/2 1/2 4/4 1 1 <NA> I6 <NA> I6 I <NA> 6 <NA> <NA> <NA> <NA> M (4, 1, 0) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I6, major I6 I6, major <NA> I (m3, m6) (M3, P5) (3, 5, 1) (3, 5, 1), major (3, 5, 1) (#3, 5, 1) 3 3, major 3 #3 M3 G I 4 2.0 1 2 2.00 0 0 4/4 1 1 <NA> V <NA> V V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G I 1 2.0 1 9/2 0.50 5/8 5/8 4/4 1 1 <NA> I <NA> I I <NA> <NA> <NA> <NA> <NA> <NA> M (0, 4, 1) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0 3 -4 -m3 -M3 False False False False leap leap leap leap
3 2 1 9/2 0.50 5/8 5/8 4/4 1 1 <NA> I <NA> I I <NA> <NA> <NA> <NA> <NA> <NA> M (0, 4, 1) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0 2.0 1 4 0.50 1/2 1/2 4/4 1 1 <NA> I6 <NA> I6 I <NA> 6 <NA> <NA> <NA> <NA> M (4, 1, 0) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I6, major I6 I6, major <NA> I (m3, m6) (M3, P5) (3, 5, 1) (3, 5, 1), major (3, 5, 1) (#3, 5, 1) 3 3, major 3 #3 M3 G I 4 2.0 1 5 0.75 3/4 3/4 4/4 1 1 <NA> V(4) <NA> V(4) V <NA> <NA> 4 <NA> <NA> <NA> M (1, 0, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V(4), major V V, major <NA> V (P4, P5) (P4, P5) (5, 1, 2) (5, 1, 2), major (5, 1, 2) (5, 1, 2) 5 5, major 5 5 P5 G I 1 -4 1 -M3 -P4 False False False False leap leap leap leap
4 2 1 5 0.75 3/4 3/4 4/4 1 1 <NA> V(4) <NA> V(4) V <NA> <NA> 4 <NA> <NA> <NA> M (1, 0, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V(4), major V V, major <NA> V (P4, P5) (P4, P5) (5, 1, 2) (5, 1, 2), major (5, 1, 2) (5, 1, 2) 5 5, major 5 5 P5 G I 1 2.0 1 9/2 0.50 5/8 5/8 4/4 1 1 <NA> I <NA> I I <NA> <NA> <NA> <NA> <NA> <NA> M (0, 4, 1) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0 2.0 1 23/4 0.25 15/16 15/16 4/4 1 1 <NA> V <NA> V V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G I 1 1 0 -P4 P1 False False False True leap same leap same
5 2 1 23/4 0.25 15/16 15/16 4/4 1 1 <NA> V <NA> V V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G I 1 2.0 1 5 0.75 3/4 3/4 4/4 1 1 <NA> V(4) <NA> V(4) V <NA> <NA> 4 <NA> <NA> <NA> M (1, 0, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V(4), major V V, major <NA> V (P4, P5) (P4, P5) (5, 1, 2) (5, 1, 2), major (5, 1, 2) (5, 1, 2) 5 5, major 5 5 P5 G I 1 3.0 2 6 2.00 0 0 4/4 1 1 <NA> I|IAC} <NA> I I <NA> <NA> <NA> <NA> IAC } M (0, 4, 1) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0 0 -1 P1 P4 False False True False same leap same leap
6 3 2 6 2.00 0 0 4/4 1 1 <NA> I|IAC} <NA> I I <NA> <NA> <NA> <NA> IAC } M (0, 4, 1) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0 2.0 1 23/4 0.25 15/16 15/16 4/4 1 1 <NA> V <NA> V V <NA> <NA> <NA> <NA> <NA> <NA> M (1, 5, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G I 1 3.0 2 8 1.00 1/2 1/2 4/4 1 1 <NA> vi{ <NA> vi vi <NA> <NA> <NA> <NA> <NA> { m (3, 0, 4) () 3 <NA> False False major major I I, major vi <NA> I I False <NA> vi, major vi vi, major <NA> vi (m3, P5) (m3, P5) (6, 1, 3) (6, 1, 3), major (6, 1, 3) (#6, 1, #3) 6 6, major 6 #6 M6 G I 3 -1 3 P4 -m3 False False False False leap leap leap leap
7 3 2 8 1.00 1/2 1/2 4/4 1 1 <NA> vi{ <NA> vi vi <NA> <NA> <NA> <NA> <NA> { m (3, 0, 4) () 3 <NA> False False major major I I, major vi <NA> I I False <NA> vi, major vi vi, major <NA> vi (m3, P5) (m3, P5) (6, 1, 3) (6, 1, 3), major (6, 1, 3) (#6, 1, #3) 6 6, major 6 #6 M6 G I 3 3.0 2 6 2.00 0 0 4/4 1 1 <NA> I|IAC} <NA> I I <NA> <NA> <NA> <NA> IAC } M (0, 4, 1) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I, major I I, major <NA> I (M3, P5) (M3, P5) (1, 3, 5) (1, 3, 5), major (1, 3, 5) (1, #3, 5) 1 1, major 1 1 P1 G I 0 3.0 2 9 1.00 3/4 3/4 4/4 1 1 <NA> I6 <NA> I6 I <NA> 6 <NA> <NA> <NA> <NA> M (4, 1, 0) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I6, major I6 I6, major <NA> I (m3, m6) (M3, P5) (3, 5, 1) (3, 5, 1), major (3, 5, 1) (#3, 5, 1) 3 3, major 3 #3 M3 G I 4 3 1 -m3 -P4 False False False False leap leap leap leap
8 3 2 9 1.00 3/4 3/4 4/4 1 1 <NA> I6 <NA> I6 I <NA> 6 <NA> <NA> <NA> <NA> M (4, 1, 0) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I6, major I6 I6, major <NA> I (m3, m6) (M3, P5) (3, 5, 1) (3, 5, 1), major (3, 5, 1) (#3, 5, 1) 3 3, major 3 #3 M3 G I 4 3.0 2 8 1.00 1/2 1/2 4/4 1 1 <NA> vi{ <NA> vi vi <NA> <NA> <NA> <NA> <NA> { m (3, 0, 4) () 3 <NA> False False major major I I, major vi <NA> I I False <NA> vi, major vi vi, major <NA> vi (m3, P5) (m3, P5) (6, 1, 3) (6, 1, 3), major (6, 1, 3) (#6, 1, #3) 6 6, major 6 #6 M6 G I 3 4.0 3 10 1.00 0 0 4/4 1 1 <NA> V6 <NA> V6 V <NA> 6 <NA> <NA> <NA> <NA> M (5, 2, 1) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V6, major V6 V6, major <NA> V (m3, m6) (M3, P5) (7, 2, 5) (7, 2, 5), major (7, 2, 5) (#7, 2, 5) 7 7, major 7 #7 M7 G I 5 1 1 -P4 -P4 False False False False leap leap leap leap
9 4 3 10 1.00 0 0 4/4 1 1 <NA> V6 <NA> V6 V <NA> 6 <NA> <NA> <NA> <NA> M (5, 2, 1) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V6, major V6 V6, major <NA> V (m3, m6) (M3, P5) (7, 2, 5) (7, 2, 5), major (7, 2, 5) (#7, 2, 5) 7 7, major 7 #7 M7 G I 5 3.0 2 9 1.00 3/4 3/4 4/4 1 1 <NA> I6 <NA> I6 I <NA> 6 <NA> <NA> <NA> <NA> M (4, 1, 0) () 0 <NA> False False major major I I, major I <NA> I I False <NA> I6, major I6 I6, major <NA> I (m3, m6) (M3, P5) (3, 5, 1) (3, 5, 1), major (3, 5, 1) (#3, 5, 1) 3 3, major 3 #3 M3 G I 4 4.0 3 11 1.00 1/4 1/4 4/4 1 1 <NA> vi%43/V <NA> vi%43/V vi % 43 <NA> V <NA> <NA> %7 (-2, 2, 4, 1) () 4 iii%43 False False major major I I, major vi/V V V/I V False <NA> vi%43/V, major vi%43/V vi%43/V, major V V (M3, a4, M6) (m3, d5, m7) (b7, 2, 3, 5) (b7, 2, 3, 5), major (b7, 2, 3, 5) (7, 2, #3, 5) b7 b7, major b7 7 m7 G I -2 1 -7 -P4 d1 False False False False leap leap leap leap
10 4 3 11 1.00 1/4 1/4 4/4 1 1 <NA> vi%43/V <NA> vi%43/V vi % 43 <NA> V <NA> <NA> %7 (-2, 2, 4, 1) () 4 iii%43 False False major major I I, major vi/V V V/I V False <NA> vi%43/V, major vi%43/V vi%43/V, major V V (M3, a4, M6) (m3, d5, m7) (b7, 2, 3, 5) (b7, 2, 3, 5), major (b7, 2, 3, 5) (7, 2, #3, 5) b7 b7, major b7 7 m7 G I -2 4.0 3 10 1.00 0 0 4/4 1 1 <NA> V6 <NA> V6 V <NA> 6 <NA> <NA> <NA> <NA> M (5, 2, 1) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V6, major V6 V6, major <NA> V (m3, m6) (M3, P5) (7, 2, 5) (7, 2, 5), major (7, 2, 5) (#7, 2, 5) 7 7, major 7 #7 M7 G I 5 4.0 3 12 1.00 1/2 1/2 4/4 1 1 <NA> ii7/V <NA> ii7/V ii <NA> 7 <NA> V <NA> <NA> mm7 (3, 0, 4, 1) () 3 <NA> False False major major I I, major ii/V V V/I V False <NA> ii7/V, major ii7/V ii7/V, major V V (m3, P5, m7) (m3, P5, m7) (6, 1, 3, 5) (6, 1, 3, 5), major (6, 1, 3, 5) (#6, 1, #3, 5) 6 6, major 6 #6 M6 G I 3 -7 5 d1 -m2 False True False False leap step leap -m2
11 4 3 12 1.00 1/2 1/2 4/4 1 1 <NA> ii7/V <NA> ii7/V ii <NA> 7 <NA> V <NA> <NA> mm7 (3, 0, 4, 1) () 3 <NA> False False major major I I, major ii/V V V/I V False <NA> ii7/V, major ii7/V ii7/V, major V V (m3, P5, m7) (m3, P5, m7) (6, 1, 3, 5) (6, 1, 3, 5), major (6, 1, 3, 5) (#6, 1, #3, 5) 6 6, major 6 #6 M6 G I 3 4.0 3 11 1.00 1/4 1/4 4/4 1 1 <NA> vi%43/V <NA> vi%43/V vi % 43 <NA> V <NA> <NA> %7 (-2, 2, 4, 1) () 4 iii%43 False False major major I I, major vi/V V V/I V False <NA> vi%43/V, major vi%43/V vi%43/V, major V V (M3, a4, M6) (m3, d5, m7) (b7, 2, 3, 5) (b7, 2, 3, 5), major (b7, 2, 3, 5) (7, 2, #3, 5) b7 b7, major b7 7 m7 G I -2 4.0 3 13 1.00 3/4 3/4 4/4 1 1 <NA> V43/V <NA> V43/V V <NA> 43 <NA> V <NA> <NA> Mm7 (3, 0, 2, 6) () 2 <NA> False False major major I I, major V/V V V/I V False <NA> V43/V, major V43/V V43/V, major V V (m3, P4, M6) (M3, P5, m7) (6, 1, 2, #4) (6, 1, 2, #4), major (6, 1, 2, #4) (#6, 1, 2, #4) 6 6, major 6 #6 M6 G I 3 5 0 -m2 P1 True False False True step same -m2 same
12 4 3 13 1.00 3/4 3/4 4/4 1 1 <NA> V43/V <NA> V43/V V <NA> 43 <NA> V <NA> <NA> Mm7 (3, 0, 2, 6) () 2 <NA> False False major major I I, major V/V V V/I V False <NA> V43/V, major V43/V V43/V, major V V (m3, P4, M6) (M3, P5, m7) (6, 1, 2, #4) (6, 1, 2, #4), major (6, 1, 2, #4) (#6, 1, 2, #4) 6 6, major 6 #6 M6 G I 3 4.0 3 12 1.00 1/2 1/2 4/4 1 1 <NA> ii7/V <NA> ii7/V ii <NA> 7 <NA> V <NA> <NA> mm7 (3, 0, 4, 1) () 3 <NA> False False major major I I, major ii/V V V/I V False <NA> ii7/V, major ii7/V ii7/V, major V V (m3, P5, m7) (m3, P5, m7) (6, 1, 3, 5) (6, 1, 3, 5), major (6, 1, 3, 5) (#6, 1, #3, 5) 6 6, major 6 #6 M6 G I 3 5.0 4 14 2.00 0 0 4/4 1 1 <NA> V|HC.TEN} <NA> V V <NA> <NA> <NA> <NA> HC.TEN } M (1, 5, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G I 1 0 -2 P1 -M2 False True True False same step same descending
13 5 4 14 2.00 0 0 4/4 1 1 <NA> V|HC.TEN} <NA> V V <NA> <NA> <NA> <NA> HC.TEN } M (1, 5, 2) () 1 <NA> False False major major I I, major V <NA> I I False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G I 1 4.0 3 13 1.00 3/4 3/4 4/4 1 1 <NA> V43/V <NA> V43/V V <NA> 43 <NA> V <NA> <NA> Mm7 (3, 0, 2, 6) () 2 <NA> False False major major I I, major V/V V V/I V False <NA> V43/V, major V43/V V43/V, major V V (m3, P4, M6) (M3, P5, m7) (6, 1, 2, #4) (6, 1, 2, #4), major (6, 1, 2, #4) (#6, 1, 2, #4) 6 6, major 6 #6 M6 G I 3 NaN <NA> NaN NaN NaN NaN <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> NaN NaN <NA> <NA> NaN NaN NaN NaN NaN NaN <NA> NaN <NA> NaN NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN <NA> <NA> <NA> -2 <NA> -M2 <NA> True <NA> False <NA> step none descending none
[22.5, 32.0) 22 7 6 45/2 1.00 1/8 1/8 4/4 1 1 <NA> V.V{ <NA> V V <NA> <NA> <NA> <NA> <NA> { M (1, 5, 2) () 1 <NA> False False major major V V, major V NaN V V False <NA> V, major V V, major <NA> V (M3, P5) (M3, P5) (5, 7, 2) (5, 7, 2), major (5, 7, 2) (5, #7, 2) 5 5, major 5 5 P5 G V 1 NaN <NA> NaN NaN NaN NaN <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> NaN NaN <NA> <NA> NaN NaN NaN NaN NaN NaN <NA> NaN <NA> NaN NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN <NA> <NA> <NA> 7.0 6 47/2 0.50 3/8 3/8 4/4 1 1 <NA> I64 <NA> I64 I <NA> 64 <NA> <NA> <NA> <NA> M (1, 0, 4) () 0 <NA> False False major major V V, major I NaN V V False <NA> I64, major I64 I64, major <NA> I (P4, M6) (M3, P5) (5, 1, 3) (5, 1, 3), major (5, 1, 3) (5, 1, #3) 5 5, major 5 5 P5 G V 1 <NA> 0 <NA> P1 <NA> False <NA> True none same none same
ignore_mask = BN.subsequent_interval.isna() | BN.subsequent_interval.duplicated()
interval2fifths = (  # mapping that allows to order the x-axis with intervals according to LoF
    BN.loc[~ignore_mask, ["subsequent_interval", "subsequent_iv"]]
    .set_index("subsequent_interval")
    .iloc[:, 0]
    .sort_values()
)

Bass movement#

interval_data = pd.concat(
    [
        BN.groupby("mode").subsequent_interval.value_counts(normalize=True),
        BN.groupby(["piece", "mode"])
        .subsequent_interval.value_counts(normalize=True)
        .groupby(["mode", "subsequent_interval"])
        .sem()
        .rename("std_err"),
    ],
    axis=1,
).reset_index()
fig = px.bar(
    interval_data,
    x="subsequent_interval",
    y="proportion",
    color="mode",
    barmode="group",
    error_y="std_err",
    color_discrete_map=utils.MAJOR_MINOR_COLORS,
    labels=dict(subsequent_interval="Interval"),
    title="Mode-wise proportion of how often a bass note moves by an interval",
    category_orders=dict(subsequent_interval=interval2fifths.index),
)
style_plotly(fig, "how_often_a_bass_note_moves_by_an_interval")
PRECISE_CATEGORIES = True

subsequent_movement = (
    "subsequent_movement_precise" if PRECISE_CATEGORIES else "subsequent_movement"
)
movement_data = pd.concat(
    [
        BN.groupby("mode")[subsequent_movement].value_counts(
            normalize=True, dropna=False
        ),
        BN.groupby(["piece", "mode"])[subsequent_movement]
        .value_counts(normalize=True, dropna=False)
        .groupby(["mode", subsequent_movement])
        .sem()
        .rename("std_err"),
    ],
    axis=1,
).reset_index()
movement_data[subsequent_movement] = movement_data[subsequent_movement].fillna("none")
fig = px.bar(
    movement_data,
    x=subsequent_movement,
    y="proportion",
    color="mode",
    barmode="group",
    error_y="std_err",
    color_discrete_map=utils.MAJOR_MINOR_COLORS,
    labels={subsequent_movement: "Movement"},
    title="Mode-wise proportion of a bass note moving in a certain manner",
    category_orders=dict(subsequent_interval=interval2fifths.index),
)
style_plotly(fig, save_as="mode-wise_bass_motion")
def make_sankey_data(
    five_major, color_edges=True, precise=True
) -> Tuple[pd.DataFrame, List[str], List[str]] | Tuple[pd.DataFrame, List[str]]:
    preceding_movement = (
        "preceding_movement_precise" if precise else "preceding_movement"
    )
    subsequent_movement = (
        "subsequent_movement_precise" if precise else "subsequent_movement"
    )
    type_counts = five_major["intervals_over_bass"].value_counts()
    preceding_movement_counts = five_major[preceding_movement].value_counts()
    subsequent_movement_counts = five_major[subsequent_movement].value_counts()
    preceding_links = five_major.groupby(
        [preceding_movement]
    ).intervals_over_bass.value_counts()
    subsequent_links = five_major.groupby(
        [subsequent_movement]
    ).intervals_over_bass.value_counts()

    node_labels = []
    label_ids = dict()
    for key, node_sizes in (
        ("preceding", preceding_movement_counts),
        ("intervals", type_counts),
        ("subsequent", subsequent_movement_counts),
    ):
        for label in node_sizes.index:
            label_id = len(node_labels)
            node_labels.append(str(label))
            label_ids[(key, label)] = label_id

    edge_columns = ["source", "target", "value"]
    if color_edges:
        node_colors = utils.make_evenly_distributed_color_map(node_labels)
        edge_columns.append("color")

    links = []
    for (prec_mov, iv), cnt in preceding_links.items():
        source_id = label_ids.get(("preceding", prec_mov))
        target_id = label_ids.get(("intervals", iv))
        if color_edges:
            edge_color = node_colors[source_id]
            links.append((source_id, target_id, cnt, edge_color))
        else:
            links.append((source_id, target_id, cnt))

    for (subs_mov, iv), cnt in subsequent_links.items():
        source_id = label_ids.get(("intervals", iv))
        target_id = label_ids.get(("subsequent", subs_mov))
        if color_edges:
            edge_color = node_colors[target_id]
            links.append((source_id, target_id, cnt, edge_color))
        else:
            links.append((source_id, target_id, cnt))

    edge_data = pd.DataFrame(links, columns=edge_columns)
    if color_edges:
        return edge_data, node_labels, node_colors
    return edge_data, node_labels


def make_bass_degree_sankey(
    bass_degree: str, mode: Literal["major", "minor"], **layout
):
    edge_data, node_labels, node_colors = make_sankey_data(
        BN.loc[mode].query(f"bass_degree == '{bass_degree}'")
    )
    fig = utils.make_sankey(edge_data, node_labels, node_color=node_colors, **layout)
    return fig

Intervals over bass degree 1#

Major#

make_bass_degree_sankey(1, "major")

Minor#

make_bass_degree_sankey(1, "minor")

Intervals over bass degree 2#

Major#

make_bass_degree_sankey(2, "major")

Minor#

make_bass_degree_sankey(2, "minor")

Intervals over bass degree 3#

Major#

make_bass_degree_sankey(3, "major")

Minor#

make_bass_degree_sankey(3, "minor")

Intervals over bass degree 4#

Major#

make_bass_degree_sankey(4, "major")

Minor#

make_bass_degree_sankey(4, "minor")

Intervals over bass degree 5#

Major#

make_bass_degree_sankey(5, "major")

Minor#

make_bass_degree_sankey(5, "minor")

Intervals over bass degree 6#

Major#

make_bass_degree_sankey(6, "major")

Minor (ascending)#

make_bass_degree_sankey("#6", "minor")

Minor (descending)#

make_bass_degree_sankey(6, "minor")

Intervals over bass degree 7#

Major#

make_bass_degree_sankey(7, "major")

Minor (ascending)#

make_bass_degree_sankey("#7", "minor")

Minor (descending)#

make_bass_degree_sankey(7, "minor")

Explanatory power of the RoO#

BN.groupby(["mode", "bass_degree"]).intervals_over_bass.apply(
    lambda S: S.value_counts().idxmax()
)
mode   bass_degree
major  #1             (m3, d5, m6)
       #4             (m3, d5, m6)
       #5             (m3, d5, d7)
       1                  (M3, P5)
       2              (m3, P4, M6)
       3                  (m3, m6)
       4                  (M3, P5)
       5                  (M3, P5)
       6                  (m3, P5)
       7              (m3, d5, m6)
       b3             (M3, a5, M7)
       b7             (M2, a4, M6)
minor  #3             (m3, d5, m6)
       #4             (m3, d5, m6)
       #6                 (m3, m6)
       #7             (m3, d5, m6)
       1                  (m3, P5)
       2              (m3, P4, M6)
       3                  (M3, M6)
       4              (m3, P5, M6)
       5                  (M3, P5)
       6              (M3, a4, M6)
       7                  (M3, M6)
Name: intervals_over_bass, dtype: object
maj = ("M3", "P5")
maj6 = ("m3", "m6")
min = ("m3", "P5")
min6 = ("M3", "M6")
Mm56 = ("m3", "d5", "m6")
Mm34 = ("m3", "P4", "M6")
Mm24 = ("M2", "a4", "M6")
mm56 = ("M3", "P5", "M6")
hdim56 = ("m3", "P5", "M6")
hdim34 = ("M3", "a4", "M6")

regole = dict(
    ascending_major=[
        ("1", maj),  # most frequent
        ("2", Mm34),  # most frequent
        ("3", maj6),  # most frequent
        ("4", mm56),  # not most frequent
        ("5", maj),  # most frequent
        ("6", min6),  # not most frequent
        ("7", Mm56),  # most frequent
    ],
    descending_major=[
        ("1", maj),  # same
        ("7", maj6),  # different, not most frequent
        ("6", Mm34),  # different, not most frequent either
        ("5", maj),  # same
        ("4", Mm24),  # different, not most frequent either
        ("3", maj6),  # same
        ("2", Mm34),  # same
    ],
    ascending_minor=[
        ("1", min),  # most frequent
        ("2", Mm34),  # most frequent
        ("3", min6),  # most frequent
        ("4", hdim56),  # most frequent
        ("5", maj),  # most frequent
        ("#6", maj6),  # most frequent
        ("#7", Mm56),  # most frequent
    ],
    descending_minor=[
        ("1", min),  # same
        ("7", min6),  # different, most frequent
        ("6", hdim34),  # different, most frequent
        ("5", maj),  # same
        ("4", Mm24),  # different, not most frequent
        ("3", min6),  # same
        ("2", Mm34),  # same
    ],
)
@cache
def get_base_df(
    basis: Literal[
        "major_all", "minor_all", "major_diatonic", "minor_diatonic"
    ],  # minor_diatonic includes 6, #6, 7, #7
    query: Optional[str] = None,
):
    global BN
    try:
        mode, selection = basis.split("_")
    except Exception:
        raise ValueError(f"Invalid keyword for basis: {basis!r}")
    base = BN.loc[[mode]]
    if selection == "all":
        result = base
    elif selection == "diatonic":
        if mode == "major":
            result = base.query("bass_degree in ('1', '2', '3', '4', '5', '6', '7')")
        elif mode == "minor":
            result = base.query(
                "bass_degree in ('1', '2', '3', '4', '5', '6', '#6', '7', '#7')"
            )
    else:
        raise ValueError(f"Unknown keyword for selection: {selection!r}")
    if query:
        result = result.query(query)
    return result


@cache
def get_bass_degree_mask(
    basis: Literal[
        "major_all", "minor_all", "major_diatonic", "minor_diatonic"
    ],  # minor_diatonic includes 6, #6, 7, #7
    bass_degree: str,
    query: Optional[str] = None,
):
    base = get_base_df(basis, query=query)
    return base.bass_degree == bass_degree


@cache
def get_intervals_mask(
    basis: Literal[
        "major_all", "minor_all", "major_diatonic", "minor_diatonic"
    ],  # minor_diatonic includes 6, #6, 7, #7
    intervals: tuple,
    query: Optional[str] = None,
):
    base = get_base_df(basis, query=query)
    return base.intervals_over_bass == intervals


@cache
def get_chord_mask(
    basis: Literal[
        "major_all", "minor_all", "major_diatonic", "minor_diatonic"
    ],  # minor_diatonic includes 6, #6, 7, #7
    bass_degree: str,
    intervals: tuple,
    query: Optional[str] = None,
):
    bass_degree_mask = get_bass_degree_mask(
        basis=basis, bass_degree=bass_degree, query=query
    )
    intervals_mask = get_intervals_mask(basis=basis, intervals=intervals, query=query)
    return bass_degree_mask & intervals_mask


@cache
def get_chord_vocabulary_mask(
    basis: Literal[
        "major_all", "minor_all", "major_diatonic", "minor_diatonic"
    ],  # minor_diatonic includes 6, #6, 7, #7
    vocabulary: Tuple[Tuple[str, tuple], ...],
    query: Optional[str] = None,
) -> pd.Series:
    base = get_base_df(basis, query=query)
    mask = pd.Series(False, index=base.index, dtype="boolean")
    for bass_degree, intervals in vocabulary:
        mask |= get_chord_mask(
            basis=basis, bass_degree=bass_degree, intervals=intervals, query=query
        )
    return mask


def inspect(
    basis: Literal[
        "major_all", "minor_all", "major_diatonic", "minor_diatonic"
    ],  # minor_diatonic includes 6, #6, 7, #7
    vocabulary: Tuple[Tuple[str, tuple], ...],
    query: Optional[str] = None,
) -> pd.DataFrame:
    base = get_base_df(basis, query=query)
    mask = get_chord_vocabulary_mask(basis=basis, vocabulary=vocabulary, query=query)
    return base[mask]


def get_vocabulary_coverage(
    basis: Literal[
        "major_all", "minor_all", "major_diatonic", "minor_diatonic"
    ],  # minor_diatonic includes 6, #6, 7, #7
    vocabulary: Tuple[Tuple[str, tuple], ...],
    query: Optional[str] = None,
) -> float:
    mask = get_chord_vocabulary_mask(basis=basis, vocabulary=vocabulary, query=query)
    return mask.sum() / len(mask)
regola_vocabulary_major = tuple(
    set(regole["ascending_major"] + regole["descending_major"])
)
regola_vocabulary_minor = tuple(
    set(regole["ascending_minor"] + regole["descending_minor"])
)


def get_coverage_values(
    major_vocabulary: Optional[Tuple[Tuple[str, tuple], ...]] = None,
    minor_vocabulary: Optional[Tuple[Tuple[str, tuple], ...]] = None,
    **name2query,
) -> pd.Series:
    if not (major_vocabulary or minor_vocabulary):
        return pd.Series()
    results = {}
    if major_vocabulary:
        results.update(
            {
                ("major", "all"): get_vocabulary_coverage(
                    "major_all", major_vocabulary
                ),
                ("major", "diatonic"): get_vocabulary_coverage(
                    "major_diatonic", major_vocabulary
                ),
            }
        )
        for name, query in name2query.items():
            results[("major", name)] = get_vocabulary_coverage(
                "major_diatonic", major_vocabulary, query=query
            )
    if minor_vocabulary:
        results.update(
            {
                ("minor", "all"): get_vocabulary_coverage(
                    "minor_all", minor_vocabulary
                ),
                ("minor", "diatonic"): get_vocabulary_coverage(
                    "minor_diatonic", minor_vocabulary
                ),
            }
        )
        for name, query in name2query.items():
            results[("minor", name)] = get_vocabulary_coverage(
                "minor_diatonic", minor_vocabulary, query=query
            )
    result = pd.Series(results, name="proportion")
    result.index.names = ["mode", "coverage_of"]
    return result


features = dict(
    to_ascending="subsequent_movement_precise == 'ascending'",
    to_descending="subsequent_movement_precise == 'descending'",
    to_either="subsequent_movement_precise == ['ascending', 'descending']",
    to_leap="subsequent_movement == 'leap'",
    to_same="subsequent_movement == 'same'",
    last_notes="subsequent_movement == 'none'",
    from_ascending="preceding_movement_precise == 'ascending'",
    from_descending="preceding_movement_precise == 'descending'",
    from_either="preceding_movement_precise == ['ascending', 'descending']",
    from_leap="preceding_movement == 'leap'",
    from_same="preceding_movement == 'same'",
    first_notes="preceding_movement == 'none'",
    to_and_from_ascending="subsequent_movement_precise == 'ascending' & preceding_movement_precise == 'ascending'",
    to_and_from_descending="subsequent_movement_precise == 'descending' & preceding_movement_precise == 'descending'",
    to_and_from_either="subsequent_movement_precise == ['ascending', 'descending'] & "
    "preceding_movement_precise == ['ascending', 'descending']",
    to_and_from_leap="subsequent_movement == 'leap' & preceding_movement == 'leap'",
    to_and_from_same="subsequent_movement == 'same' & preceding_movement == 'same'",
)

regola_coverage = get_coverage_values(
    regola_vocabulary_major, regola_vocabulary_minor, **features
)
regola_coverage
mode   coverage_of           
major  all                       0.651522
       diatonic                  0.662083
       to_ascending              0.738881
       to_descending             0.723051
       to_either                 0.731538
       to_leap                   0.652388
       to_same                   0.403084
       last_notes                0.827957
       from_ascending            0.680057
       from_descending           0.787728
       from_either               0.730000
       from_leap                 0.665669
       from_same                 0.436123
       first_notes               0.726277
       to_and_from_ascending     0.768293
       to_and_from_descending    0.852814
       to_and_from_either        0.824176
       to_and_from_leap          0.624314
       to_and_from_same          0.444444
minor  all                       0.626947
       diatonic                  0.638242
       to_ascending              0.765468
       to_descending             0.714504
       to_either                 0.740741
       to_leap                   0.643806
       to_same                   0.363229
       last_notes                0.629371
       from_ascending            0.705036
       from_descending           0.714504
       from_either               0.709630
       from_leap                 0.643243
       from_same                 0.461883
       first_notes               0.684783
       to_and_from_ascending     0.811321
       to_and_from_descending    0.745174
       to_and_from_either        0.783835
       to_and_from_leap          0.620690
       to_and_from_same          0.409091
Name: proportion, dtype: float64
pd.concat(
    {("cumulative", "0"): regola_coverage}, names=["vocabulary", "rank"]
).to_frame()
proportion
vocabulary rank mode coverage_of
cumulative 0 major all 0.651522
diatonic 0.662083
to_ascending 0.738881
to_descending 0.723051
to_either 0.731538
to_leap 0.652388
to_same 0.403084
last_notes 0.827957
from_ascending 0.680057
from_descending 0.787728
from_either 0.730000
from_leap 0.665669
from_same 0.436123
first_notes 0.726277
to_and_from_ascending 0.768293
to_and_from_descending 0.852814
to_and_from_either 0.824176
to_and_from_leap 0.624314
to_and_from_same 0.444444
minor all 0.626947
diatonic 0.638242
to_ascending 0.765468
to_descending 0.714504
to_either 0.740741
to_leap 0.643806
to_same 0.363229
last_notes 0.629371
from_ascending 0.705036
from_descending 0.714504
from_either 0.709630
from_leap 0.643243
from_same 0.461883
first_notes 0.684783
to_and_from_ascending 0.811321
to_and_from_descending 0.745174
to_and_from_either 0.783835
to_and_from_leap 0.620690
to_and_from_same 0.409091
len(regola_vocabulary_major), len(regola_vocabulary_minor)
(10, 10)
def make_coverage_plot_data(
    include_singular_vocabularies=True, **features
) -> pd.DataFrame:
    all_chords = BN[["bass_degree", "intervals_over_bass"]].apply(tuple, axis=1)
    chord_ranking = all_chords.groupby("mode").value_counts(normalize=True)
    major_ranking, minor_ranking = (
        chord_ranking.loc["major"],
        chord_ranking.loc["minor"],
    )
    major_vocab, minor_vocab = [], []
    results = {}
    for i, (maj_chord, min_chord) in enumerate(
        itertools.zip_longest(major_ranking.index, minor_ranking.index), 1
    ):
        if maj_chord:
            major_vocab.append(maj_chord)
        if min_chord:
            minor_vocab.append(min_chord)
        key = ("cumulative", i) if include_singular_vocabularies else i
        values = get_coverage_values(tuple(major_vocab), tuple(minor_vocab), **features)
        chord = pd.Series(str(maj_chord), index=values.index, name="chord")
        chord.loc["minor"] = str(min_chord)
        results[key] = pd.concat([values, chord], axis=1)
        if not include_singular_vocabularies:
            continue
        single_maj_vocab = (maj_chord,) if maj_chord else None
        single_min_vocab = (min_chord,) if min_chord else None
        values = get_coverage_values(single_maj_vocab, single_min_vocab, **features)
        results[("single", i)] = pd.concat([values, chord], axis=1)
    index_levels = ["vocabulary", "rank"] if include_singular_vocabularies else ["rank"]
    return pd.concat(results, names=index_levels)


result = make_coverage_plot_data(**features)
regola_results = pd.concat(
    {("cumulative", 10.5): regola_coverage}, names=["vocabulary", "rank"]
).to_frame()
regola_results.loc[:, "chord"] = "regola"
result = pd.concat(
    [
        regola_results,
        result,
    ]
).sort_index()
result
proportion chord
vocabulary rank mode coverage_of
cumulative 1.0 major all 0.220880 ('1', ('M3', 'P5'))
diatonic 0.224460 ('1', ('M3', 'P5'))
first_notes 0.237226 ('1', ('M3', 'P5'))
from_ascending 0.252511 ('1', ('M3', 'P5'))
from_descending 0.150912 ('1', ('M3', 'P5'))
... ... ... ... ... ...
single 117.0 minor to_ascending 0.000000 ('7', ('P4', 'P5'))
to_descending 0.000000 ('7', ('P4', 'P5'))
to_either 0.000000 ('7', ('P4', 'P5'))
to_leap 0.000532 ('7', ('P4', 'P5'))
to_same 0.000000 ('7', ('P4', 'P5'))

8930 rows × 2 columns

fig = px.line(
    result.reset_index(),
    x="rank",
    y="proportion",
    color="coverage_of",
    facet_col="mode",
    facet_row="vocabulary",
    hover_name="chord",
    log_x=True,
)
style_plotly(
    fig,
    match_facet_yaxes=True,
    height=1500,
    legend=dict(
        orientation="h",
    ),
)